msg_tool\scripts\artemis\ast/
text.rs1use super::types::*;
2use crate::utils::escape::*;
3use anyhow::Result;
4use unicode_segmentation::UnicodeSegmentation;
5
6fn escape_text(s: &str) -> String {
7 let mut escaped = String::with_capacity(s.len());
8 for c in s.chars() {
9 match c {
10 '&' => escaped.push_str("&"),
11 '<' => escaped.push_str("<"),
12 _ => escaped.push(c),
13 }
14 }
15 escaped
16}
17
18pub struct TextGenerator {
19 data: String,
20}
21
22impl TextGenerator {
23 pub fn new() -> Self {
24 TextGenerator {
25 data: String::new(),
26 }
27 }
28
29 pub fn generate(mut self, v: &Value) -> Result<String> {
30 for (i, item) in v.members().enumerate() {
31 match item {
32 Value::Str(s) => {
33 self.data.push_str(&escape_text(s));
34 }
35 Value::Float(_) => {
36 return Err(anyhow::anyhow!(
37 "Unexpected float value at {} in text: item={:?}, {:?}",
38 i,
39 item,
40 v
41 ));
42 }
43 Value::Int(_) => {
44 return Err(anyhow::anyhow!(
45 "Unexpected int value at {} in text: item={:?}, {:?}",
46 i,
47 item,
48 v
49 ));
50 }
51 Value::KeyVal((k, _)) => {
52 if k != "name" {
53 return Err(anyhow::anyhow!(
54 "Unexpected key at {} in text: item={:?}, {:?}",
55 i,
56 item,
57 v
58 ));
59 }
60 }
61 Value::Array(arr) => {
62 self.data.push('<');
63 let mut first = true;
64 for item in arr {
65 if !first {
66 self.data.push(' ');
67 }
68 first = false;
69 match item {
70 Value::Str(s) => {
71 self.data.push_str(s);
72 }
73 Value::Float(f) => {
74 if f.fract() == 0.0 {
75 self.data.push_str(&format!("{:.1}", f));
76 } else {
77 self.data.push_str(&f.to_string());
78 }
79 }
80 Value::Int(i) => {
81 self.data.push_str(&i.to_string());
82 }
83 Value::KeyVal((k, v)) => {
84 let k = k.as_str().ok_or(anyhow::anyhow!(
85 "Expected key to be a string, but found: {:?}",
86 k
87 ))?;
88 self.data.push_str(k);
89 self.data.push('=');
90 match v.as_ref() {
91 Value::Str(s) => {
92 self.data.push('"');
93 self.data.push_str(&escape_xml_attr_value(s));
94 self.data.push('"');
95 }
96 Value::Float(f) => {
97 if f.fract() == 0.0 {
98 self.data.push_str(&format!("{:.1}", f));
99 } else {
100 self.data.push_str(&f.to_string());
101 }
102 }
103 Value::Int(i) => {
104 self.data.push_str(&i.to_string());
105 }
106 Value::Null => {}
107 _ => {
108 return Err(anyhow::anyhow!(
109 "Unexpected value type in text: item={:?}, {:?}",
110 item,
111 arr
112 ));
113 }
114 }
115 }
116 Value::Array(_) => {
117 return Err(anyhow::anyhow!(
118 "Unexpected nested array in text: item={:?}, {:?}",
119 item,
120 arr
121 ));
122 }
123 _ => {
124 first = true;
125 }
126 }
127 }
128 self.data.push('>');
129 }
130 _ => {}
131 }
132 }
133 Ok(self.data)
134 }
135}
136
137pub struct TextParser<'a> {
138 data: Value,
139 text: Vec<&'a str>,
140 pos: usize,
141 len: usize,
142}
143
144impl<'a> TextParser<'a> {
145 pub fn new(str: &'a str) -> Self {
146 let text: Vec<&'a str> = UnicodeSegmentation::graphemes(str, true).collect();
147 let len = text.len();
148 TextParser {
149 data: Value::new_array(),
150 text,
151 pos: 0,
152 len,
153 }
154 }
155
156 pub fn parse(mut self) -> Result<Value> {
157 while let Some(c) = self.peek() {
158 match c {
159 "<" => {
160 self.parse_array()?;
161 }
162 _ => {
163 let mut text = String::new();
164 self.eat_char();
165 text.push_str(c);
166 while let Some(b) = self.peek() {
167 if b == "<" {
168 break;
169 }
170 text.push_str(b);
171 self.eat_char();
172 }
173 if !text.is_empty() {
174 self.data.push_member(Value::Str(unescape_xml(&text)));
175 }
176 }
177 }
178 }
179 Ok(self.data)
180 }
181
182 fn parse_array(&mut self) -> Result<()> {
183 let mut arr = Value::new_array();
184 self.parse_indent("<")?;
185 loop {
186 let c = self.peek().ok_or(self.error2("Unexpected eof"))?;
187 match c {
188 ">" => {
189 self.eat_char();
190 break;
191 }
192 "-" | "." | "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" => {
193 arr.push_member(self.parse_any_number()?);
194 }
195 " " => {
196 self.eat_char();
197 }
198 _ => {
199 let key = self.parse_key()?;
200 let v = if self.is_indent("=") {
201 self.parse_indent("=")?;
202 let v = match self.peek().ok_or(self.error2("Unexpected eof"))? {
203 "\"" => self.parse_str()?,
204 "-" | "." | "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8"
205 | "9" => self.parse_any_number()?,
206 _ => return self.error("Expected value after key"),
207 };
208 Value::new_kv(key, v)
209 } else {
210 Value::Str(key)
211 };
212 arr.push_member(v);
213 }
214 }
215 }
216 self.data.push_member(arr);
217 Ok(())
218 }
219
220 fn parse_any_number(&mut self) -> Result<Value> {
221 self.erase_whitespace();
222 let mut number = String::new();
223 while let Some(c) = self.peek() {
224 if c == "."
225 || c == "-"
226 || c == "0"
227 || c == "1"
228 || c == "2"
229 || c == "3"
230 || c == "4"
231 || c == "5"
232 || c == "6"
233 || c == "7"
234 || c == "8"
235 || c == "9"
236 {
237 number.push_str(c);
238 self.eat_char();
239 } else {
240 break;
241 }
242 }
243 if number.contains(".") {
244 number
245 .parse()
246 .map(Value::Float)
247 .map_err(|e| self.error2(format!("failed to parse f64: {}", e)))
248 } else {
249 number
250 .parse()
251 .map(Value::Int)
252 .map_err(|e| self.error2(format!("failed to parse i64: {}", e)))
253 }
254 }
255
256 fn parse_key(&mut self) -> Result<String> {
257 self.erase_whitespace();
258 let mut key = String::new();
259 while let Some(c) = self.peek() {
260 if c == "=" || c == " " || c == ">" {
261 break;
262 }
263 key.push_str(c);
264 self.eat_char();
265 }
266 if key.is_empty() {
267 return self.error("Expected key, but found nothing");
268 }
269 Ok(key)
270 }
271
272 fn parse_str(&mut self) -> Result<Value> {
273 self.erase_whitespace();
274 self.parse_indent("\"")?;
275 let mut text = String::new();
276 loop {
277 match self.next().ok_or(self.error2("Unexpected eof"))? {
278 "\"" => {
279 break;
280 }
281 t => {
282 text.push_str(t);
283 }
284 }
285 }
286 Ok(Value::Str(unescape_xml(&text)))
287 }
288
289 fn eat_char(&mut self) {
290 if self.pos < self.len {
291 self.pos += 1;
292 }
293 }
294
295 fn erase_whitespace(&mut self) {
296 while let Some(c) = self.peek() {
297 if c == " " {
298 self.eat_char();
299 } else {
300 break;
301 }
302 }
303 }
304
305 fn is_indent(&self, indent: &str) -> bool {
306 let mut pos = self.pos;
307 for ident in indent.graphemes(true) {
308 if pos >= self.len || self.text[pos] != ident {
309 return false;
310 }
311 pos += 1;
312 }
313 true
314 }
315
316 fn parse_indent(&mut self, indent: &str) -> Result<()> {
317 for ident in indent.graphemes(true) {
318 match self.next() {
319 Some(c) => {
320 if c != ident {
321 return self.error("Unexpected indent");
322 }
323 }
324 None => return self.error("Unexpected eof"),
325 }
326 }
327 Ok(())
328 }
329
330 fn next(&mut self) -> Option<&'a str> {
331 if self.pos < self.len {
332 let item = self.text[self.pos];
333 self.pos += 1;
334 Some(item)
335 } else {
336 None
337 }
338 }
339
340 fn peek(&self) -> Option<&'a str> {
341 if self.pos < self.len {
342 Some(self.text[self.pos])
343 } else {
344 None
345 }
346 }
347
348 fn error2<T>(&self, msg: T) -> anyhow::Error
349 where
350 T: std::fmt::Display,
351 {
352 anyhow::anyhow!("Failed to parse at position {}: {}", self.pos, msg)
353 }
354
355 fn error<T, A>(&self, msg: T) -> Result<A>
356 where
357 T: std::fmt::Display,
358 {
359 Err(anyhow::anyhow!(
360 "Failed to parse at position {}: {}",
361 self.pos,
362 msg
363 ))
364 }
365}
366
367#[test]
368fn test_gen() {
369 let v = Value::Array(vec![
370 Value::Str("\"Hello<Dat>".to_string()),
371 Value::Array(vec![
372 Value::Str("title".to_string()),
373 Value::Int(1),
374 Value::Float(2.0),
375 Value::new_kv("name", "World"),
376 Value::new_kv("int", 42),
377 Value::new_kv("float", 3.0),
378 ]),
379 Value::Str(">World".to_string()),
380 ]);
381 assert_eq!(
382 TextGenerator::new().generate(&v).unwrap(),
383 "\"Hello<Dat><title 1 2.0 name=\"World\" int=42 float=3.0>>World"
384 );
385}
386
387#[test]
388fn test_parse() {
389 let text = "\"Hello<Dat><title 1 2.0 name=\"World\" int=42 float=3.0>>World";
390 let v = Value::Array(vec![
391 Value::Str("\"Hello<Dat>".to_string()),
392 Value::Array(vec![
393 Value::Str("title".to_string()),
394 Value::Int(1),
395 Value::Float(2.0),
396 Value::new_kv("name", "World"),
397 Value::new_kv("int", 42),
398 Value::new_kv("float", 3.0),
399 ]),
400 Value::Str(">World".to_string()),
401 ]);
402 assert_eq!(TextParser::new(text).parse().unwrap(), v);
403}